---
title: "Big Figure"
output: html_document
---

#get adult data and process
```{r setup, include=FALSE}
library(DESeq2)
library("magrittr")
library("dplyr")
library("ggplot2")
library("reshape2")
library("RColorBrewer")
library("gplots")

matrix = read.table("adult2.counts.matrix")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good


adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]


matrix = as.matrix(adults_new)
matrix2 = round(matrix)

annotation = read.csv("/Users/emmaberdan/Documents/RNA study/New Transcriptome/new_trinotate_annotation_report.csv", header=TRUE, sep=",")

```

#run DESEQ for all adults
```{r}
dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Sex + Genotype + Sex*Genotype)

dds$Genotype <- relevel(dds$Genotype, ref = "BB")

dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)
```


#separate out genotype and sex significant DE genes alpha = 0.05 and logfold min = 2
```{r}
res_geno <- results(dds, name="Genotype_AA_vs_BB", alpha=0.05) %>% as.data.frame
resx_geno <- res_geno %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame

res_sex <- results(dds, name="Sex_Male_vs_Female", alpha=0.05)  %>%  as.data.frame
resx_sex <- res_sex %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame


```



#Do just males
```{r}
matrix = read.table("adult2.counts.matrix")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good


adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]


matrix = as.matrix(adults_new)
matrix2 = round(matrix)



dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype)

colnames(dds)
#keep males
dds <- dds[,c(2,5,7,10,12,13,14,16)]

dds$Genotype <- relevel(dds$Genotype, ref = "BB")

dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)

```

#separate out genotype significant DE genes alpha = 0.05 and logfold min = 2
```{r}
res_Mgeno <- results(dds, name="Genotype_AA_vs_BB", alpha=0.05) %>% as.data.frame
resx_Mgeno <- res_Mgeno %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame
sig_Mgeno = resx_Mgeno[,c(2,6)]


```

#Do just females
```{r}
matrix = read.table("adult2.counts.matrix")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good


adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]


matrix = as.matrix(adults_new)
matrix2 = round(matrix)



dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype)

colnames(dds)
#keep females
dds <- dds[,c(1,3,4,6,8,9,11,15,17)]

dds$Genotype <- relevel(dds$Genotype, ref = "BB")

dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)

```


#separate out genotype alpha = 0.05 and logfold min = 2
```{r}
res_Fgeno <- results(dds, name="Genotype_AA_vs_BB", alpha=0.05) %>% as.data.frame
resx_Fgeno <- res_Fgeno %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame

sig_Fgeno = resx_Fgeno[,c(2,6)]

```



#Larvae 
```{r}
library(DESeq2)
library("magrittr")
library("dplyr")
library("ggplot2")
library("reshape2")
library("RColorBrewer")
library("gplots")
library("ggbio")

matrix = read.table("larvae.counts.matrix")
samples = read.table("larvae.txt", sep="\t", header=TRUE, row.names = 1)
samples$ID = rownames(samples)

# remove all rows with less than 10 total counts
larvae_new = matrix %>% mutate(Total = dplyr::select(., L1:YBB2) %>% rowSums(na.rm = TRUE))
rownames(larvae_new) = rownames(matrix)
larvae_new = subset(larvae_new, larvae_new[ , 29] > 10, drop=FALSE ) 
larvae_new = larvae_new[,1:28]

matrix = as.matrix(larvae_new)
matrix2 = round(matrix)


dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype + Population)
#remove L3v1

dds <- dds[,-11]

#remove SKBB1
dds <- dds[,-24]


dds$Genotype <- relevel(dds$Genotype, ref = "BB")


dds = DESeq(dds)

resultsNames(dds)
plotDispEsts(dds)

```

#separate out genotype alpha = 0.05 and logfold min = 2
```{r}
res_geno_AAvsBB <- results(dds, contrast=c("Genotype","AA","BB"), alpha=0.05) %>% as.data.frame
resx_geno_AAvsBB <- res_geno_AAvsBB %>% subset(padj < 0.05) %>% as.data.frame

sig_AABB = resx_geno_AAvsBB[,c(2,6)]
sig_AABB$contig = rownames(sig_AABB)

res_geno_ABvsBB <- results(dds, contrast=c("Genotype","AB","BB"), alpha=0.05)  %>% as.data.frame
resx_geno_ABvsBB <- res_geno_ABvsBB %>% subset(padj < 0.05) %>% as.data.frame

sig_ABBB = resx_geno_ABvsBB[,c(2,6)]
sig_ABBB$contig = rownames(sig_ABBB)
```

#Get position info
```{r}

library(splitstackshape)
library(dplyr)

genome_annot<-read.delim("gmap.gff3", header=F)
genome_annot<-genome_annot[genome_annot$V3=="gene",c(1,3,4,5,9)]
colnames(genome_annot)<-c("LG","gene","start","stop","transcript")
head(genome_annot)
genome_split = cSplit(genome_annot, "transcript", sep = ";")
genome_split = cSplit(genome_split, "transcript_2", sep = "=")
head(genome_split)
genome_annot2<-genome_split[,c(1,3,4,7)]
colnames(genome_annot2)[4]<-"id_transcript"
head(genome_annot2)
dim(genome_annot2)
genome_annot3<-genome_annot2 %>% distinct(id_transcript, .keep_all = TRUE)
dim(genome_annot3)
```

#merge for all adults

```{r}
#DE adults
res_geno$id_transcript = rownames(res_geno)
DE_pos<-left_join(res_geno, genome_annot2)
head(DE_pos)

DE_pos_no_na<-DE_pos[-(which(is.na(DE_pos$start))),]
dim(DE_pos)
dim(DE_pos_no_na)



#inv info
annot_inv<-read.table("annotation_inv_definitive.txt", header=TRUE)
head(annot_inv)
colnames(annot_inv)[1]<-"LG"

#plot info
DE_pos_no_na$pos_mean<-(DE_pos_no_na$start+DE_pos_no_na$stop)/2
DE_pos2<-DE_pos_no_na[-grep("scaffold",DE_pos_no_na$LG),]
DE_pos2$LG<-as.factor(as.character(DE_pos2$LG))

DE_adults = DE_pos2
```

#merge for males
```{r}
#DE males
res_Mgeno$id_transcript = rownames(res_Mgeno)
DE_pos<-left_join(res_Mgeno, genome_annot2)
head(DE_pos)

DE_pos_no_na<-DE_pos[-(which(is.na(DE_pos$start))),]
dim(DE_pos)
dim(DE_pos_no_na)



#inv info
annot_inv<-read.table("annotation_inv_definitive.txt", header=TRUE)
head(annot_inv)
colnames(annot_inv)[1]<-"LG"

#plot info
DE_pos_no_na$pos_mean<-(DE_pos_no_na$start+DE_pos_no_na$stop)/2
DE_pos2<-DE_pos_no_na[-grep("scaffold",DE_pos_no_na$LG),]
DE_pos2$LG<-as.factor(as.character(DE_pos2$LG))

DE_males = DE_pos2

```

#merge for females
```{r}
#DE females
res_Fgeno$id_transcript = rownames(res_Fgeno)
DE_pos<-left_join(res_Fgeno, genome_annot2)
head(DE_pos)

DE_pos_no_na<-DE_pos[-(which(is.na(DE_pos$start))),]
dim(DE_pos)
dim(DE_pos_no_na)



#inv info
annot_inv<-read.table("annotation_inv_definitive.txt", header=TRUE)
head(annot_inv)
colnames(annot_inv)[1]<-"LG"

#plot info
DE_pos_no_na$pos_mean<-(DE_pos_no_na$start+DE_pos_no_na$stop)/2
DE_pos2<-DE_pos_no_na[-grep("scaffold",DE_pos_no_na$LG),]
DE_pos2$LG<-as.factor(as.character(DE_pos2$LG))

DE_females = DE_pos2

```

#merge for larvae
```{r}
#DE larvae
res_geno_AAvsBB$id_transcript = rownames(res_geno_AAvsBB)
DE_pos<-left_join(res_geno_AAvsBB, genome_annot2)
head(DE_pos)

DE_pos_no_na<-DE_pos[-(which(is.na(DE_pos$start))),]
dim(DE_pos)
dim(DE_pos_no_na)


#inv info
annot_inv<-read.table("annotation_inv_definitive.txt", header=TRUE)
head(annot_inv)
colnames(annot_inv)[1]<-"LG"

#plot info
DE_pos_no_na$pos_mean<-(DE_pos_no_na$start+DE_pos_no_na$stop)/2
DE_pos2<-DE_pos_no_na[-grep("scaffold",DE_pos_no_na$LG),]
DE_pos2$LG<-as.factor(as.character(DE_pos2$LG))

DE_larvae = DE_pos2 

```

#generate across the genome figures
```{r}
adults1<-ggplot(DE_adults, aes(x=pos_mean/1000000, y=log2FoldChange,colour=-log(padj))) +
  geom_point(alpha=0.5, aes(size=-log(padj))) + 
  facet_grid(cols=vars(LG), scales = "free_x", space="free_x")+
  scale_colour_gradientn(colours=c("grey","turquoise","blue","navyblue","black"))+
  theme_classic()+ theme(  axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14), strip.text.x = element_text(size=12)) +
  labs( y = "Logfold Change"~paste(alpha,alpha)~"vs."~paste(beta,beta)~"Adults", x= "Position (in Mb)", color="-Log(padj)", size="-Log(padj)") + geom_vline( data=annot_inv[1,], aes(xintercept = start/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1")+ 
  geom_vline( data=annot_inv[1,], aes(xintercept = stop/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1") + ylim(-20,20)

library(grid)
library(gtable)

gt_a = ggplot_gtable(ggplot_build(adults1))
gt_a$widths[15] = 4*gt_a$widths[15]
adults2=grid.draw(gt_a)
grid.draw(gt_a)


males1<-ggplot(DE_males, aes(x=pos_mean/1000000, y=log2FoldChange,colour=-log(padj))) +
  geom_point(alpha=0.5, aes(size=-log(padj))) + 
  facet_grid(cols=vars(LG), scales = "free_x", space="free_x")+
  scale_colour_gradientn(colours=c("grey","turquoise","blue","navyblue","black"))+
  theme_classic()+ theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14), strip.text.x = element_text(size=12)) +
  labs( y = "Logfold Change"~paste(alpha,alpha)~"vs."~paste(beta,beta), x= "Position (in Mb)", color="-Log(padj)", size="-Log(padj)") + geom_vline( data=annot_inv[1,], aes(xintercept = start/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1")+ 
  geom_vline( data=annot_inv[1,], aes(xintercept = stop/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1")  + ylim(-20,20)


library(grid)
library(gtable)

gt_m = ggplot_gtable(ggplot_build(males1))
gt_m$widths[15] = 4*gt_m$widths[15]
males2=grid.draw(gt_m)
grid.draw(gt_m)


females1<-ggplot(DE_females, aes(x=pos_mean/1000000, y=log2FoldChange,colour=-log(padj))) +
  geom_point(alpha=0.5, aes(size=-log(padj))) + 
  facet_grid(cols=vars(LG), scales = "free_x", space="free_x")+
  scale_colour_gradientn(colours=c("grey","turquoise","blue","navyblue","black"))+
  theme_classic()+ theme(legend.position = "none",  axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14), strip.text.x = element_text(size=12)) +
  labs( y = "Logfold Change"~paste(alpha,alpha)~"vs."~paste(beta,beta), x= "Position (in Mb)", color="-Log(padj)", size="-Log(padj)") + geom_vline( data=annot_inv[1,], aes(xintercept = start/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1")+ 
  geom_vline( data=annot_inv[1,], aes(xintercept = stop/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1")  + ylim(-20,20)


library(grid)
library(gtable)
library(gridExtra)


gt_f = ggplot_gtable(ggplot_build(females1))
gt_f$widths[15] = 4*gt_f$widths[15]
females2=grid.draw(gt_f)
grid.draw(gt_f)



larvae1<-ggplot(DE_larvae, aes(x=pos_mean/1000000, y=log2FoldChange,colour=-log(padj))) +
  geom_point(alpha=0.5, aes(size=-log(padj))) + 
  facet_grid(cols=vars(LG), scales = "free_x", space="free_x")+
  scale_colour_gradientn(colours=c("grey","turquoise","blue","navyblue","black"))+
  theme_classic()+ theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14), strip.text.x = element_text(size=12)) +
  labs( y = "Logfold Change"~paste(alpha,alpha)~"vs."~paste(beta,beta), x= "Position (in Mb)", color="-Log(padj)", size="-Log(padj)") + geom_vline( data=annot_inv[1,], aes(xintercept = start/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1")+ 
  geom_vline( data=annot_inv[1,], aes(xintercept = stop/1000000), linetype="dotted", size=1,show.legend = FALSE, colour="darkorchid1")  + ylim(-20,20)


gt_l = ggplot_gtable(ggplot_build(larvae1))
gt_l$widths[15] = 4*gt_l$widths[15]
larvae2 = grid.draw(gt_l)
grid.draw(gt_l)

```


#density of logfold changes
```{r}

library(gapminder)
library(patchwork)
library(plyr)


ggplotColours <- function(n = 6, h = c(0, 360) + 15){
  if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
  hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}

test=ggplotColours(n=3)

#prep data
positions = read.table("positions.csv", sep=",", header=TRUE)


res_Mgeno$Contig = rownames(res_Mgeno)
res_Fgeno$Contig = rownames(res_Fgeno)
res_geno_AAvsBB$Contig = rownames(res_geno_AAvsBB)

res_Mgeno$type = "Males"
res_Fgeno$type = "Females"
res_geno_AAvsBB$type = "Larvae"




all = rbind(res_Fgeno, res_Mgeno,res_geno_AAvsBB)

both = merge(all, positions, by=c("Contig"))

both$pos=as.character(both$pos)
both$pos[which(both$pos!="LG1_inversion")]<-"Collinear"



ks.test(all$log2FoldChange, all$log2FoldChange)




justinv = subset(both, both$pos=="LG1_inversion")

fall= both[both$type=="Females",]
lall = both[both$type=="Larvae",]
mall = both[both$type=="Males",]

finv= justinv[justinv$type=="Females",]
linv = justinv[justinv$type=="Larvae",]
minv = justinv[justinv$type=="Males",]

ks.test(mall$log2FoldChange,minv$log2FoldChange)


mu <- ddply(na.omit(both), "type", summarise, grp.mean=mean(log2FoldChange))

mu2 <- ddply(na.omit(justinv), "type", summarise, grp.mean=mean(log2FoldChange))




dens_l = ggplot(lall, aes(x=log2FoldChange, color=pos)) + scale_color_manual(values=c("grey55", "darkorchid1")) + geom_density(size=1) + ylim(0,0.7) + xlim(-7.5,7.5) + geom_vline(data=mu, aes(xintercept=-0.57452673),linetype="dashed", size=1, color="grey55") +  geom_vline(data=mu2, aes(xintercept=-0.6987221),linetype="dashed", size=1, color="darkorchid1") +  labs(y="Density") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))

dens_m = ggplot(res_Mgeno, aes(x=log2FoldChange)) +
  geom_density(size=1, color="grey55") + ylim(0,0.7) + xlim(-7.5,7.5) + geom_vline(data=mu, aes(xintercept=-0.17746462),color="grey55",linetype="dashed", size=1) + labs(y="Density") + theme_bw() + theme(legend.position = "none",  axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))


dens_f = ggplot(res_Fgeno, aes(x=log2FoldChange)) +
  geom_density(size=1, color="grey55") + ylim(0,0.7) + xlim(-7.5,7.5) + geom_vline(data=mu, aes(xintercept=-0.17746462),color="grey55",linetype="dashed", size=1) + labs(y="Density") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))



dens_l_inv = ggplot(justinv[justinv$type=="Larvae",], aes(x=log2FoldChange)) +
  geom_density(size=1, color="darkorchid1") + ylim(0,0.7) + xlim(-7.5,7.5) + geom_vline(data=mu, aes(xintercept=-0.57452673),linetype="dashed", size=1, color="darkorchid1") + labs(y="Density") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))

dens_m_inv = ggplot(justinv[justinv$type=="Males",], aes(x=log2FoldChange)) +
  geom_density(size=1, color="darkorchid1") + ylim(0,0.7) + xlim(-7.5,7.5) + geom_vline(data=mu, aes(xintercept=-0.17746462),color="darkorchid1",linetype="dashed", size=1) + labs(y="Density") + theme_bw() + theme(legend.position = "none",  axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))


dens_f_inv = ggplot(justinv[justinv$type=="Females",], aes(x=log2FoldChange)) +
  geom_density(size=1, color="darkorchid1") + ylim(0,0.7) + xlim(-7.5,7.5) + geom_vline(data=mu, aes(xintercept=-0.17746462),color="darkorchid1",linetype="dashed", size=1) + labs(y="Density") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))







dens_inv = ggplot(justinv, aes(x=log2FoldChange, color=type)) +
  geom_density() + ylim(0,0.7) + xlim(-7.5,7.5) + geom_vline(data=mu2, aes(xintercept=grp.mean, color=type),linetype="dashed") + labs(y="Density", color="Data Set", title = "Log2FoldChanges within the inverted region") + theme_bw()



#overlapping
dens_l = ggplot(lall, aes(x=log2FoldChange, color=pos)) + scale_color_manual(values=c("grey55", "darkorchid1")) + geom_density(size=1) + ylim(0,0.7) + xlim(-7.5,7.5) +  labs(y="Density") + theme_bw() + theme( legend.position = "none",axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))


dens_m = ggplot(mall, aes(x=log2FoldChange, color=pos)) + scale_color_manual(values=c("grey55", "darkorchid1")) + geom_density(size=1) + ylim(0,0.7) + xlim(-7.5,7.5) +  labs(y="Density") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))


dens_f = ggplot(fall, aes(x=log2FoldChange, color=pos)) + scale_color_manual(values=c("grey55", "darkorchid1")) + geom_density(size=1) + ylim(0,0.7) + xlim(-7.5,7.5) +  labs(y="Density") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14))

other_legend = ggplot(lall, aes(x=log2FoldChange, color=pos)) + scale_color_manual(values=c("grey55", "darkorchid1"), labels=c("Whole Genome", expression(italic("Cf-Inv(1)")))) + geom_density(size=1) + ylim(0,0.7) + xlim(-7.5,7.5) +  labs(y="Density") + theme_bw() + theme( axis.text.x = element_text(size=12), axis.title.x = element_text(size=14),axis.text.y = element_text(size=12), axis.title.y = element_text(size=14), legend.title = element_blank())


```

#make big figure
```{r}
library(cowplot)
library(ggpubr)
library(patchwork)


legend_b2 <- get_legend(adults1 +theme(legend.direction="horizontal",legend.position="bottom"))


legend_b3 <- get_legend(other_legend +theme(legend.direction="horizontal",legend.position="bottom"))



#with overlapping density plots


across = plot_grid(plot_grid(gt_f, gt_m, gt_l, nrow=3, labels=c("A", "C","E")), legend_b2, ncol = 1, rel_heights  = c(1, .075),align = "hv")


dens = plot_grid(plot_grid(dens_f, dens_m, dens_l, nrow=3, labels=c("B", "D","F")), legend_b3, ncol = 1, rel_heights  = c(1, .075),align = "hv")


m = plot_grid(gt_m, dens_m, nrow=1, rel_heights  = c(1, .7), rel_widths = c(1,0.4),labels=c("C","D"))


l = plot_grid(gt_l, dens_l, nrow=1, rel_heights  = c(1, .7), rel_widths = c(1,0.4),labels=c("E","F"))




genome = plot_grid(across,dens, ncol=2, align="vh",labels="",rel_widths = c(1,0.4)) 

pdf("Figure3.pdf",width=12,height=12)
genome
dev.off()



```

